library(tidyverse)
library(texreg)

# import datasets 
# set working directory to folder containing these datasets
d <- read_csv("individual_level_data.csv")
dpol <- read_csv("policy_level_data.csv")

# labels for plots
policy_labels <- c("obamacare" = "Obamacare", "imm_wall" = "Build Wall",
                   "teaparty" = "Tea Party", "schoolfund" = "Fund Schools",
                   "gaymarr" = "Gay Marriage", "party" = "Party ID", 
                   "blm" = "Black Lives Matter", "imm_muslim" = "Muslim Ban",
                   "enviroregs" = "Environmental Regs.", "frack" = "Fracking",
                   "marijuana" = "Marijuana", "wage" = "Minimum Wage", 
                   "abortion" = "Abortion", 
                   "trans" = "Trans Bathroom Ban", "unions" = "Unions", 
                   "deathpen" = "Death Penalty", "climate" = "Climate Change",
                   "socsec" = "Social Security", "foodlabels" = "GMO Labels",
                   "imm_path" = "Path to Citizenship", 
                   "commoncore" = "Common Core",
                   "casinos" = "Casinos", "citizensunited" = "Citizens United")


#### Figure 1 ####
dpol |> ggplot(aes(value, fct_reorder(name, value))) +
      geom_point(aes(shape = cut_number(cp, 3))) +
      geom_vline(xintercept = .5, linetype = 2) +
      labs(x = "Accuracy (Proportion Correct)", 
           y = "Policy") +
      scale_shape_discrete(name = "Cross-Pressure", 
                           labels = c("Low", "Mid", "High")) +
      scale_y_discrete(labels = policy_labels) +
      theme_bw() +
      coord_cartesian(xlim = c(0,1)) + 
      theme(text = element_text(size = 12))


#### Figure 2 ####
dpol |>
      ggplot(aes(cp, fct_reorder(name, cp))) +
      geom_point() +
      labs(x = "Proportion Cross-Pressured on Policy",
           y = "Policy") +
      scale_y_discrete(labels = policy_labels) +
      theme_bw() +
      coord_cartesian(xlim = c(0,.4)) + 
      theme(text = element_text(size = 12))


#### Figure 3 ####
dpol |>
      filter(!name %in% c("casinos", "foodlabels", "party")) |>
      ggplot(aes(cp, value)) + 
      geom_point() + 
      geom_smooth(method = "lm", colour = "black", se = F) +
      labs(x = "Proportion Cross-Pressured on Policy",
           y = "Proportion Correctly Predicted") +
      theme_bw() + 
      annotate("text", x = .31, y = .75, label = "Slope = -0.966") +
      annotate("text", x = .32, y = .71, label = "Adj. R-squared = 0.30") +
      theme(text = element_text(size = 12))


#### Table 1 ####

# Accuracy model (table 1)
m_cor <- lm(correct_pct ~ pid_strength + polint + cross_pct +
                  pid7 + age + white + female + 
                  educ + religiosity + party_reg_closed, d, subset = !is.na(pid2))
summary(m_cor)

# Missingness model (table 1)
m_miss <- lm(missing_pct ~ pid_strength + polint + cross_pct +
                   pid7 + age + white + female + 
                   educ + religiosity + party_reg_closed, d)
summary(m_miss)

coef_map <- list("cross_pct" = "Cross-pressured",
                 "pid_strength" = "Strength of Party ID", 
                 "polint" = " Political Interest", 
                 "pid7" = "Party ID (7-pt)",
                 "age" = "Age", 
                 "white" = "White",
                 "female" = "Female",
                 "educ" = "Education", 
                 "religiosity" = "Religiosity", 
                 "party_reg_closed" = "Closed Primary State")

screenreg(list(m_cor, m_miss), 
               stars = c(0.001, 0.01, 0.05, 0.1), threeparttable = T, use.packages = F, single.row = T, 
               symbol = "+", label = "tab_r_level", booktabs = T, 
               custom.model.names = c("Accuracy", "Missingness"),
               custom.coef.map = coef_map, 
               include.rsquared = F,
               caption = "Correlates of Respondents' Rates of Accurate and Missing Predictions",
               float.pos = "h", digits = 3, dcolumn = T,
               custom.note = "\\item %stars \\item Estimates of linear 
               regression models of the percent of respondents' 1) correctly predicted policy positions 
               and 2) missing predictions. Accuracy model omits independents. Standard errors in parentheses.")





#### Appendix ####

##### Figure A1 #####

dpolgender<- read_csv("policy_level_gender_data.csv")

dpolgender |> ggplot(aes(value, fct_reorder(name, value))) +
      geom_point(aes(shape = cut_number(cp, 3), 
                     color = gender)) +
      geom_vline(xintercept = .5, linetype = 2) +
      labs(x = "Accuracy (Proportion Correct)", 
           y = "Policy") +
      scale_shape_discrete(name = "Cross-Pressure", 
                           labels = c("Low", "Mid", "High")) +
      scale_y_discrete(labels = policy_labels) +
      scale_color_discrete(name = "Gender", 
                           palette = c("red", "blue")) +
      theme_bw() +
      coord_cartesian(xlim = c(0,1)) + 
      theme(text = element_text(size = 12))

##### Figure A2 #####

dpolrace <- read_csv("policy_level_race_data.csv")

dpolrace |> ggplot(aes(value, fct_reorder(name, value))) +
      geom_point(aes(shape = cut_number(cp, 3), 
                     color = race)) +
      geom_vline(xintercept = .5, linetype = 2) +
      labs(x = "Accuracy (Proportion Correct)", 
           y = "Policy") +
      scale_shape_discrete(name = "Cross-Pressure", 
                           labels = c("Low", "Mid", "High")) +
      scale_y_discrete(labels = policy_labels) +
      scale_color_discrete(name = "Race", 
                           palette = c("darkgreen", "purple")) +
      theme_bw() +
      coord_cartesian(xlim = c(0,1)) + 
      theme(text = element_text(size = 12))


##### Figure A3 #####
d |> ggplot(aes(predictions_total)) + 
      geom_histogram(binwidth = 1, color = "white") +
      labs(x = "Number of Valid Policy-Position Predicitions",
           y = "Count") + 
      theme_bw()


##### Table D3 #####

# regress accuracy on cross-pressuredness
m_cross <- lm(value ~ cp + n, dpol, 
              subset = !name %in% c("foodlabels", "casinos", 
                                    "party", "socialviews", 
                                    "fiscalviews"))

# regress missingness on cross-pressuredness
m_iss_miss <- lm(ms ~ cp, dpol, 
                 subset = !name %in% c("foodlabels", "casinos", 
                                       "party", "socialviews", 
                                       "fiscalviews"))

texreg::screenreg(list(m_cross, m_iss_miss), 
               stars = c(0.001, 0.01, 0.05, 0.1), threeparttable = T, use.packages = F, single.row = T, 
               symbol = "+", label = "tab_iss_cr_ms", booktabs = T, 
               custom.model.names = c("Accuracy", "Missingness"),
               custom.coef.names = c("Intercept", "Cross-Pressured", "Valid Predictions"), 
               include.rsquared = F,
               caption = "Issue-Level Accuracy Regressed on Cross-Pressure",
               float.pos = "h", digits = 3, dcolumn = T,
               custom.note = "\\item %stars \\item Estimates of linear 
               regression models of 1) the proportion of correctly predicted respondents per issue and 2) 
               the proportion of respondents with a missing prediction per issue on the proportion 
               cross-pressured and the total number of respondents with valid responses.
               Standard errors in parentheses.")


